# Importar pandas
import pandas as pd
# Cargar el archivo CSV local con punto y coma como delimitador
file_path = "fallecidos_covid.csv"
covid_data = pd.read_csv(file_path, delimiter=';')
# Mostrar información sobre el DataFrame cargado
covid_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 220918 entries, 0 to 220917 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FECHA_CORTE 220918 non-null int64 1 FECHA_FALLECIMIENTO 220918 non-null int64 2 EDAD_DECLARADA 220918 non-null int64 3 SEXO 220918 non-null object 4 CLASIFICACION_DEF 220918 non-null object 5 DEPARTAMENTO 220918 non-null object 6 PROVINCIA 220913 non-null object 7 DISTRITO 220913 non-null object 8 UBIGEO 220913 non-null float64 9 UUID 218449 non-null float64 dtypes: float64(2), int64(3), object(5) memory usage: 16.9+ MB
covid_data.head()
| FECHA_CORTE | FECHA_FALLECIMIENTO | EDAD_DECLARADA | SEXO | CLASIFICACION_DEF | DEPARTAMENTO | PROVINCIA | DISTRITO | UBIGEO | UUID | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20240317 | 20220219 | 63 | MASCULINO | Criterio virológico | TUMBES | TUMBES | TUMBES | 240101.0 | 203506.0 |
| 1 | 20240317 | 20210529 | 74 | MASCULINO | Criterio virológico | TUMBES | TUMBES | TUMBES | 240101.0 | 203532.0 |
| 2 | 20240317 | 20210623 | 72 | FEMENINO | Criterio SINADEF | TACNA | TACNA | ALTO DE LA ALIANZA | 230102.0 | 203584.0 |
| 3 | 20240317 | 20210824 | 85 | MASCULINO | Criterio investigación Epidemiológica | TUMBES | TUMBES | TUMBES | 240101.0 | 212541.0 |
| 4 | 20240317 | 20210627 | 46 | MASCULINO | Criterio virológico | TACNA | TACNA | ALTO DE LA ALIANZA | 230102.0 | 203665.0 |
covid_data.FECHA_FALLECIMIENTO.value_counts()
FECHA_FALLECIMIENTO
20210409 828
20210420 822
20210413 821
20210419 820
20210416 820
...
20230909 1
20231129 1
20230919 1
20240227 1
20231214 1
Name: count, Length: 1397, dtype: int64
# Asegúrate de que la columna 'FECHA_RESULTADO' existe y convierte 'AÑO_RESULTADO' extrayendo el año
covid_data['AÑO_FALLECIMIENTO'] = pd.to_numeric(covid_data['FECHA_FALLECIMIENTO'].astype(str).str[:4], errors='coerce')
# Contabilizar los casos por año en el DataFrame filtrado
casos_por_año_filtrado = covid_data['AÑO_FALLECIMIENTO'].value_counts().sort_index()
# Mostrar el resultado
print(casos_por_año_filtrado)
AÑO_FALLECIMIENTO 2020 95331 2021 107962 2022 14742 2023 2570 2024 313 Name: count, dtype: int64
covid_data.CLASIFICACION_DEF.value_counts()
CLASIFICACION_DEF Criterio virológico 84557 Criterio SINADEF 66198 Criterio serológico 42910 Criterio investigación Epidemiológica 10783 Criterio clínico 8994 Criterio radiológico 5113 Criterio nexo epidemiológico 2215 NotiCovid 132 sinadef 16 Name: count, dtype: int64
Criterio virológico: Muerte en un caso confirmado de COVID-19 que fallece en los 60 días posteriores a una prueba molecular (PCR) o antigénica reactiva para SARS-CoV-2. Criterio serológico: Muerte en un caso confirmado de COVID-19 que fallece en los 60 días posteriores a una prueba serológica positiva IgM o IgM/IgG para SARS-CoV-2. Criterio radiológico: Muerte en un caso probable de COVID-19 que presenta una imagen radiológica, tomográfica o de resonancia magnética nuclear compatible con neumonía COVID-1 Criterio nexo epidemiológico: Muerte en un caso probable de COVID-19 que presenta nexo epidemiológico con un caso confirmado de COVI Criterio investigación epidemiológica: Muerte en un caso sospechoso de COVID-19 que es verificado por investigación epidemiológica de la Red Nacional de Epidemiología (RE · Criterio clínico: Muerte en un caso sospechoso de COVID-19 que presenta cuadro clínico compatible con la en C · Criterio SINADEF Muerte con certificado de defunción en el que se presenta el diagnóstico de COVID-19 como causa de la muerte. El fallecimiento por COVID-19 en el certificado de defunción está definido por la presencia en los campos A, B, C o D de los códigos CIE-10: U071, U072, B342, B972, o la mención de los términos “coronavirus”, “cov-2”, “cov2”, “covid” y “sars”.
# Unificar todas las variantes de "sinadef" en "Criterio SINADEF"
covid_data['CLASIFICACION_DEF'] = covid_data['CLASIFICACION_DEF'].replace(
['sinadef', 'sinadef', 'criterio sinadef', 'Criterio sinadef', 'Criterio SINADEF'], 'Criterio SINADEF'
)
# Verificar el conteo nuevamente para ver los cambios
covid_data['CLASIFICACION_DEF'].value_counts()
CLASIFICACION_DEF Criterio virológico 84557 Criterio SINADEF 66214 Criterio serológico 42910 Criterio investigación Epidemiológica 10783 Criterio clínico 8994 Criterio radiológico 5113 Criterio nexo epidemiológico 2215 NotiCovid 132 Name: count, dtype: int64
# Filtrar los datos para excluir la categoría 'NotiCovid' en la columna CLASIFICACION_DEF
covid_criterios = covid_data[covid_data.CLASIFICACION_DEF != 'NotiCovid']
# Mostrar las primeras filas del nuevo DataFrame
covid_criterios.head()
| FECHA_CORTE | FECHA_FALLECIMIENTO | EDAD_DECLARADA | SEXO | CLASIFICACION_DEF | DEPARTAMENTO | PROVINCIA | DISTRITO | UBIGEO | UUID | AÑO_FALLECIMIENTO | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 20240317 | 20220219 | 63 | MASCULINO | Criterio virológico | TUMBES | TUMBES | TUMBES | 240101.0 | 203506.0 | 2022 |
| 1 | 20240317 | 20210529 | 74 | MASCULINO | Criterio virológico | TUMBES | TUMBES | TUMBES | 240101.0 | 203532.0 | 2021 |
| 2 | 20240317 | 20210623 | 72 | FEMENINO | Criterio SINADEF | TACNA | TACNA | ALTO DE LA ALIANZA | 230102.0 | 203584.0 | 2021 |
| 3 | 20240317 | 20210824 | 85 | MASCULINO | Criterio investigación Epidemiológica | TUMBES | TUMBES | TUMBES | 240101.0 | 212541.0 | 2021 |
| 4 | 20240317 | 20210627 | 46 | MASCULINO | Criterio virológico | TACNA | TACNA | ALTO DE LA ALIANZA | 230102.0 | 203665.0 | 2021 |
indexList=['AÑO_FALLECIMIENTO','DEPARTAMENTO','PROVINCIA','CLASIFICACION_DEF']
aggregator={'CLASIFICACION_DEF':[len]}
covid_provYear=covid_criterios.groupby(indexList,observed=True).agg(aggregator)
covid_provYear
| CLASIFICACION_DEF | ||||
|---|---|---|---|---|
| len | ||||
| AÑO_FALLECIMIENTO | DEPARTAMENTO | PROVINCIA | CLASIFICACION_DEF | |
| 2020 | AMAZONAS | BAGUA | Criterio SINADEF | 39 |
| Criterio clínico | 13 | |||
| Criterio investigación Epidemiológica | 58 | |||
| Criterio nexo epidemiológico | 19 | |||
| Criterio radiológico | 6 | |||
| ... | ... | ... | ... | ... |
| 2024 | PIURA | SULLANA | Criterio virológico | 1 |
| TALARA | Criterio virológico | 1 | ||
| PUNO | PUNO | Criterio virológico | 1 | |
| SAN ROMAN | Criterio virológico | 2 | ||
| TACNA | TACNA | Criterio SINADEF | 1 |
2869 rows × 1 columns
covidDraft=covid_provYear.unstack(3).fillna(0) #leftmost index in rows
covidDraft
| CLASIFICACION_DEF | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| len | |||||||||
| CLASIFICACION_DEF | Criterio SINADEF | Criterio clínico | Criterio investigación Epidemiológica | Criterio nexo epidemiológico | Criterio radiológico | Criterio serológico | Criterio virológico | ||
| AÑO_FALLECIMIENTO | DEPARTAMENTO | PROVINCIA | |||||||
| 2020 | AMAZONAS | BAGUA | 39.0 | 13.0 | 58.0 | 19.0 | 6.0 | 109.0 | 26.0 |
| BONGARA | 3.0 | 1.0 | 1.0 | 1.0 | 0.0 | 15.0 | 4.0 | ||
| CHACHAPOYAS | 16.0 | 3.0 | 1.0 | 0.0 | 4.0 | 30.0 | 7.0 | ||
| CONDORCANQUI | 17.0 | 17.0 | 24.0 | 3.0 | 0.0 | 18.0 | 0.0 | ||
| LUYA | 6.0 | 1.0 | 2.0 | 1.0 | 1.0 | 9.0 | 0.0 | ||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2024 | PIURA | SULLANA | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| TALARA | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ||
| PUNO | PUNO | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| SAN ROMAN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | ||
| TACNA | TACNA | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | |
768 rows × 7 columns
covidDraft['CRITERIO_pct']=covidDraft.iloc[:,1]/(covidDraft.iloc[:,0] + covidDraft.iloc[:,1])
covid_provYear_Alarm_w=covidDraft['CRITERIO_pct'].unstack('AÑO_FALLECIMIENTO').fillna(0)
covid_provYear_Alarm_w
| AÑO_FALLECIMIENTO | 2020 | 2021 | 2022 | 2023 | 2024 | |
|---|---|---|---|---|---|---|
| DEPARTAMENTO | PROVINCIA | |||||
| AMAZONAS | BAGUA | 0.250000 | 0.153846 | 0.052632 | 0.0 | 0.0 |
| BONGARA | 0.250000 | 0.166667 | 0.000000 | 0.0 | 0.0 | |
| CHACHAPOYAS | 0.157895 | 0.161290 | 0.285714 | 0.0 | 0.0 | |
| CONDORCANQUI | 0.500000 | 0.250000 | 0.000000 | 0.0 | 0.0 | |
| LUYA | 0.142857 | 0.153846 | 0.000000 | 0.0 | 0.0 | |
| ... | ... | ... | ... | ... | ... | ... |
| TUMBES | ZARUMILLA | 0.306122 | 0.156250 | 0.000000 | 0.0 | 0.0 |
| UCAYALI | ATALAYA | 0.562500 | 0.210526 | 0.000000 | 0.0 | 0.0 |
| CORONEL PORTILLO | 0.283002 | 0.374046 | 0.000000 | 0.0 | 0.0 | |
| PADRE ABAD | 0.441860 | 0.388889 | 0.333333 | 0.0 | 0.0 | |
| PURUS | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 |
196 rows × 5 columns
covid_provYear_Alarm_w.columns
Index([2020, 2021, 2022, 2023, 2024], dtype='int64', name='AÑO_FALLECIMIENTO')
covid_provYear_Alarm_w.columns=['Año'+str(x) for x in covid_provYear_Alarm_w.columns]
covid_provYear_Alarm_w
| Año2020 | Año2021 | Año2022 | Año2023 | Año2024 | ||
|---|---|---|---|---|---|---|
| DEPARTAMENTO | PROVINCIA | |||||
| AMAZONAS | BAGUA | 0.250000 | 0.153846 | 0.052632 | 0.0 | 0.0 |
| BONGARA | 0.250000 | 0.166667 | 0.000000 | 0.0 | 0.0 | |
| CHACHAPOYAS | 0.157895 | 0.161290 | 0.285714 | 0.0 | 0.0 | |
| CONDORCANQUI | 0.500000 | 0.250000 | 0.000000 | 0.0 | 0.0 | |
| LUYA | 0.142857 | 0.153846 | 0.000000 | 0.0 | 0.0 | |
| ... | ... | ... | ... | ... | ... | ... |
| TUMBES | ZARUMILLA | 0.306122 | 0.156250 | 0.000000 | 0.0 | 0.0 |
| UCAYALI | ATALAYA | 0.562500 | 0.210526 | 0.000000 | 0.0 | 0.0 |
| CORONEL PORTILLO | 0.283002 | 0.374046 | 0.000000 | 0.0 | 0.0 | |
| PADRE ABAD | 0.441860 | 0.388889 | 0.333333 | 0.0 | 0.0 | |
| PURUS | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 |
196 rows × 5 columns
covid_provYear_Alarm_w.reset_index(inplace=True)
covid_provYear_Alarm_w
| DEPARTAMENTO | PROVINCIA | Año2020 | Año2021 | Año2022 | Año2023 | Año2024 | |
|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 0.250000 | 0.153846 | 0.052632 | 0.0 | 0.0 |
| 1 | AMAZONAS | BONGARA | 0.250000 | 0.166667 | 0.000000 | 0.0 | 0.0 |
| 2 | AMAZONAS | CHACHAPOYAS | 0.157895 | 0.161290 | 0.285714 | 0.0 | 0.0 |
| 3 | AMAZONAS | CONDORCANQUI | 0.500000 | 0.250000 | 0.000000 | 0.0 | 0.0 |
| 4 | AMAZONAS | LUYA | 0.142857 | 0.153846 | 0.000000 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | TUMBES | ZARUMILLA | 0.306122 | 0.156250 | 0.000000 | 0.0 | 0.0 |
| 192 | UCAYALI | ATALAYA | 0.562500 | 0.210526 | 0.000000 | 0.0 | 0.0 |
| 193 | UCAYALI | CORONEL PORTILLO | 0.283002 | 0.374046 | 0.000000 | 0.0 | 0.0 |
| 194 | UCAYALI | PADRE ABAD | 0.441860 | 0.388889 | 0.333333 | 0.0 | 0.0 |
| 195 | UCAYALI | PURUS | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 |
196 rows × 7 columns
mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'
import geopandas as gpd
provmap=gpd.read_file(mapLink)
provmap.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA 196 non-null object 5 geometry 196 non-null geometry dtypes: float64(1), geometry(1), object(4) memory usage: 9.3+ KB
provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)
| OBJECTID | CCDD | CCPP | DEPARTAMEN | PROVINCIA | geometry | location | |
|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 01 | 01 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS |
| 1 | 2.0 | 01 | 02 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA |
| 2 | 3.0 | 01 | 03 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... | AMAZONAS+BONGARA |
| 3 | 4.0 | 01 | 04 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI |
| 4 | 5.0 | 01 | 05 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... | AMAZONAS+LUYA |
| 5 | 6.0 | 01 | 06 | AMAZONAS | RODRIGUEZ DE MENDOZA | POLYGON ((-77.44452 -6.05002, -77.44387 -6.050... | AMAZONAS+RODRIGUEZ DE MENDOZA |
| 6 | 7.0 | 01 | 07 | AMAZONAS | UTCUBAMBA | POLYGON ((-78.09288 -5.36258, -78.09288 -5.364... | AMAZONAS+UTCUBAMBA |
| 7 | 8.0 | 02 | 01 | ANCASH | HUARAZ | POLYGON ((-77.39870 -9.35563, -77.39852 -9.356... | ANCASH+HUARAZ |
| 8 | 9.0 | 02 | 02 | ANCASH | AIJA | POLYGON ((-77.61368 -9.64900, -77.61241 -9.649... | ANCASH+AIJA |
| 9 | 10.0 | 02 | 03 | ANCASH | ANTONIO RAYMONDI | POLYGON ((-77.08856 -8.97496, -77.08804 -8.975... | ANCASH+ANTONIO RAYMONDI |
covid_provYear_Alarm_w['location']=['+'.join(x[0]) for x in zip(covid_provYear_Alarm_w.iloc[:,:2].values)]
covid_provYear_Alarm_w.head()
| DEPARTAMENTO | PROVINCIA | Año2020 | Año2021 | Año2022 | Año2023 | Año2024 | location | |
|---|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 0.250000 | 0.153846 | 0.052632 | 0.0 | 0.0 | AMAZONAS+BAGUA |
| 1 | AMAZONAS | BONGARA | 0.250000 | 0.166667 | 0.000000 | 0.0 | 0.0 | AMAZONAS+BONGARA |
| 2 | AMAZONAS | CHACHAPOYAS | 0.157895 | 0.161290 | 0.285714 | 0.0 | 0.0 | AMAZONAS+CHACHAPOYAS |
| 3 | AMAZONAS | CONDORCANQUI | 0.500000 | 0.250000 | 0.000000 | 0.0 | 0.0 | AMAZONAS+CONDORCANQUI |
| 4 | AMAZONAS | LUYA | 0.142857 | 0.153846 | 0.000000 | 0.0 | 0.0 | AMAZONAS+LUYA |
import unidecode
byePunctuation=lambda x: unidecode.unidecode(x)
covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)
# replacing dashes and multiple spaces by a simple space
covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
<>:2: SyntaxWarning: invalid escape sequence '\-'
<>:3: SyntaxWarning: invalid escape sequence '\-'
<>:2: SyntaxWarning: invalid escape sequence '\-'
<>:3: SyntaxWarning: invalid escape sequence '\-'
C:\Users\Sistema\AppData\Local\Temp\ipykernel_51764\1873358278.py:2: SyntaxWarning: invalid escape sequence '\-'
covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w.location.str.replace("\-|\_|\s+","",regex=True)
C:\Users\Sistema\AppData\Local\Temp\ipykernel_51764\1873358278.py:3: SyntaxWarning: invalid escape sequence '\-'
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
nomatch_df=set(covid_provYear_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_Alarm_w.location)
len(nomatch_df), len(nomatch_gdf)
(2, 2)
from thefuzz import process
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[('ANCASH+ANTONIORAIMONDI', ('ANCASH+ANTONIORAYMONDI', 95)),
('ICA+NAZCA', ('ICA+NASCA', 89))]
{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
{'ANCASH+ANTONIORAIMONDI': 'ANCASH+ANTONIORAYMONDI', 'ICA+NAZCA': 'ICA+NASCA'}
changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
covid_provYear_Alarm_w.replace({'location': changesinDF}, inplace=True)
nomatch_df=set(covid_provYear_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_Alarm_w.location)
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[]
# Ahora puedes hacer el merge sin problemas, usando un nombre diferente para el indicador
covid_provYear_Alarm_map = provmap_df.merge(covid_provYear_Alarm_w, on='location', how='left', indicator='merge_flag')
# Revisar la información después del merge
print(covid_provYear_Alarm_map.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA_x 196 non-null object 5 location 196 non-null object 6 DEPARTAMENTO 196 non-null object 7 PROVINCIA_y 196 non-null object 8 Año2020 196 non-null float64 9 Año2021 196 non-null float64 10 Año2022 196 non-null float64 11 Año2023 196 non-null float64 12 Año2024 196 non-null float64 13 merge_flag 196 non-null category dtypes: category(1), float64(6), object(7) memory usage: 20.4+ KB None
import geopandas as gpd
# Asegurarte de que provmap es un GeoDataFrame
provmap = gpd.GeoDataFrame(provmap, geometry='geometry')
# Realizar el merge para agregar la columna de geometría
covid_provYear_Alarm_map = covid_provYear_Alarm_map.merge(provmap[['location', 'geometry']], on='location', how='left')
# Convertir a GeoDataFrame después del merge
covid_provYear_Alarm_map = gpd.GeoDataFrame(covid_provYear_Alarm_map, geometry='geometry')
print(covid_provYear_Alarm_map.head())
OBJECTID CCDD CCPP DEPARTAMEN PROVINCIA_x location \ 0 1.0 01 01 AMAZONAS CHACHAPOYAS AMAZONAS+CHACHAPOYAS 1 2.0 01 02 AMAZONAS BAGUA AMAZONAS+BAGUA 2 3.0 01 03 AMAZONAS BONGARA AMAZONAS+BONGARA 3 4.0 01 04 AMAZONAS CONDORCANQUI AMAZONAS+CONDORCANQUI 4 5.0 01 05 AMAZONAS LUYA AMAZONAS+LUYA DEPARTAMENTO PROVINCIA_y Año2020 Año2021 Año2022 Año2023 Año2024 \ 0 AMAZONAS CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0 1 AMAZONAS BAGUA 0.250000 0.153846 0.052632 0.0 0.0 2 AMAZONAS BONGARA 0.250000 0.166667 0.000000 0.0 0.0 3 AMAZONAS CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0 4 AMAZONAS LUYA 0.142857 0.153846 0.000000 0.0 0.0 merge_flag geometry 0 both POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... 1 both POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... 2 both POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... 3 both POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... 4 both POLYGON ((-78.13023 -5.90370, -78.13011 -5.904...
# Convertir la columna 'merge_flag' a tipo string si existe
covid_provYear_Alarm_map['merge_flag'] = covid_provYear_Alarm_map['merge_flag'].astype(str)
# Crear la nueva columna 'PROVINCIA' que toma valores de 'PROVINCIA_x' y completa con 'PROVINCIA_y' si hay valores nulos
covid_provYear_Alarm_map['PROVINCIA'] = covid_provYear_Alarm_map['PROVINCIA_x'].combine_first(covid_provYear_Alarm_map['PROVINCIA_y'])
# Eliminar las columnas 'PROVINCIA_x' y 'PROVINCIA_y'
covid_provYear_Alarm_map.drop(columns=['PROVINCIA_x', 'PROVINCIA_y'], inplace=True)
# Verificar el resultado
print(covid_provYear_Alarm_map.head())
OBJECTID CCDD CCPP DEPARTAMEN location DEPARTAMENTO \
0 1.0 01 01 AMAZONAS AMAZONAS+CHACHAPOYAS AMAZONAS
1 2.0 01 02 AMAZONAS AMAZONAS+BAGUA AMAZONAS
2 3.0 01 03 AMAZONAS AMAZONAS+BONGARA AMAZONAS
3 4.0 01 04 AMAZONAS AMAZONAS+CONDORCANQUI AMAZONAS
4 5.0 01 05 AMAZONAS AMAZONAS+LUYA AMAZONAS
Año2020 Año2021 Año2022 Año2023 Año2024 merge_flag \
0 0.157895 0.161290 0.285714 0.0 0.0 both
1 0.250000 0.153846 0.052632 0.0 0.0 both
2 0.250000 0.166667 0.000000 0.0 0.0 both
3 0.500000 0.250000 0.000000 0.0 0.0 both
4 0.142857 0.153846 0.000000 0.0 0.0 both
geometry PROVINCIA
0 POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... CHACHAPOYAS
1 POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... BAGUA
2 POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... BONGARA
3 POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... CONDORCANQUI
4 POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... LUYA
print(covid_provYear_Alarm_map.info())
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 location 196 non-null object 5 DEPARTAMENTO 196 non-null object 6 Año2020 196 non-null float64 7 Año2021 196 non-null float64 8 Año2022 196 non-null float64 9 Año2023 196 non-null float64 10 Año2024 196 non-null float64 11 merge_flag 196 non-null object 12 geometry 196 non-null geometry 13 PROVINCIA 196 non-null object dtypes: float64(6), geometry(1), object(7) memory usage: 21.6+ KB None
# Crear la lista de columnas que deseas eliminar
bye = ['merge_flag', 'CCPP', 'CCDD', 'DEPARTAMENTO']
# Filtrar las columnas para incluir solo aquellas que existen en el DataFrame
bye = [col for col in bye if col in covid_provYear_Alarm_map.columns]
# Eliminar las columnas especificadas
covid_provYear_Alarm_map.drop(columns=bye, inplace=True)
# Visualizar las primeras filas para confirmar
covid_provYear_Alarm_map.head()
| OBJECTID | DEPARTAMEN | location | Año2020 | Año2021 | Año2022 | Año2023 | Año2024 | geometry | PROVINCIA | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | AMAZONAS | AMAZONAS+CHACHAPOYAS | 0.157895 | 0.161290 | 0.285714 | 0.0 | 0.0 | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | CHACHAPOYAS |
| 1 | 2.0 | AMAZONAS | AMAZONAS+BAGUA | 0.250000 | 0.153846 | 0.052632 | 0.0 | 0.0 | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | BAGUA |
| 2 | 3.0 | AMAZONAS | AMAZONAS+BONGARA | 0.250000 | 0.166667 | 0.000000 | 0.0 | 0.0 | POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... | BONGARA |
| 3 | 4.0 | AMAZONAS | AMAZONAS+CONDORCANQUI | 0.500000 | 0.250000 | 0.000000 | 0.0 | 0.0 | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | CONDORCANQUI |
| 4 | 5.0 | AMAZONAS | AMAZONAS+LUYA | 0.142857 | 0.153846 | 0.000000 | 0.0 | 0.0 | POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... | LUYA |
covid_provYear_Alarm_map.fillna(0,inplace=True)
# Ruta del archivo .shp (ajusta la ruta si es necesario)
shp_file_path = r"C:\Users\Sistema\OneDrive\Documentos\PUCP\2024-2\Herramientas cuantitativas\Tarea 4\GeoData Covid\GeoData-Covid-\maps\gadm41_PER_2.shp"
# Cargar el archivo Shapefile
provincias_peru = gpd.read_file(shp_file_path)
# Convertir multipartes a partes individuales
provincias_peru = provincias_peru.explode(index_parts=False)
# Mostrar las primeras filas para verificar la carga
print(provincias_peru.head())
GID_2 GID_0 COUNTRY GID_1 NAME_1 NL_NAME_1 NAME_2 \
0 PER.1.1_1 PER Peru PER.1_1 Amazonas NA Bagua
0 PER.1.1_1 PER Peru PER.1_1 Amazonas NA Bagua
1 PER.1.2_1 PER Peru PER.1_1 Amazonas NA Bongará
2 PER.1.3_1 PER Peru PER.1_1 Amazonas NA Chachapoyas
3 PER.1.4_1 PER Peru PER.1_1 Amazonas NA Condorcanqui
VARNAME_2 NL_NAME_2 TYPE_2 ENGTYPE_2 CC_2 HASC_2 \
0 NA NA Provincia Province NA PE.AM.BG
0 NA NA Provincia Province NA PE.AM.BG
1 Bongart NA Provincia Province NA PE.AM.BN
2 NA NA Provincia Province NA PE.AM.CP
3 NA NA Provincia Province NA PE.AM.CQ
geometry
0 POLYGON ((-78.29144 -5.55342, -78.28999 -5.556...
0 POLYGON ((-78.63635 -4.49835, -78.63632 -4.498...
1 POLYGON ((-77.76215 -5.81584, -77.76093 -5.816...
2 POLYGON ((-77.78199 -6.94651, -77.78417 -6.950...
3 POLYGON ((-78.10184 -5.34889, -78.10178 -5.348...
provincias_peru.to_file("provinciasPeru.gpkg", layer='provincias', driver="GPKG")
import os
os.makedirs('maps', exist_ok=True)
covid_provYear_Alarm_map.to_file(os.path.join('maps', "provinciasPeru.gpkg"), layer='provinciasCovid', driver="GPKG")
covid_provYear_Alarm_map.Año2020.describe()
count 196.000000 mean 0.206520 std 0.164606 min 0.000000 25% 0.098810 50% 0.182458 75% 0.285714 max 1.000000 Name: Año2020, dtype: float64
import seaborn as sea
sea.boxplot(covid_provYear_Alarm_map.Año2020, color='skyblue',orient='h')
<Axes: xlabel='Año2020'>
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(covid_provYear_Alarm_map[['Año2020']])
sea.boxplot(qt_result, color='skyblue',orient='h')
<Axes: >
covid_provYear_Alarm_map['Año_2020_qt']=qt_result
from libpysal.weights import Queen, Rook, KNN
# rook
w_rook = Rook.from_dataframe(covid_provYear_Alarm_map,use_index=False)
w_queen = Queen.from_dataframe(covid_provYear_Alarm_map,use_index=False)
# Filtrar los datos de Lima
covid_provYear_Alarm_map_lima = covid_provYear_Alarm_map[covid_provYear_Alarm_map['DEPARTAMEN'] == 'LIMA']
# Verificar los datos de Lima
print(covid_provYear_Alarm_map_lima.head())
OBJECTID DEPARTAMEN location Año2020 Año2021 Año2022 \
127 128.0 LIMA LIMA+LIMA 0.093098 0.049750 0.037594
128 129.0 LIMA LIMA+BARRANCA 0.201754 0.083333 0.000000
129 130.0 LIMA LIMA+CAJATAMBO 0.941176 0.000000 0.000000
130 131.0 LIMA LIMA+CANTA 0.083333 0.083333 0.000000
131 132.0 LIMA LIMA+CANETE 0.235119 0.051429 0.034483
Año2023 Año2024 geometry \
127 0.027027 0.0 MULTIPOLYGON (((-77.06517 -11.57512, -77.06505...
128 0.000000 0.0 POLYGON ((-77.73536 -10.32351, -77.73407 -10.3...
129 0.000000 0.0 POLYGON ((-76.90483 -10.27726, -76.90451 -10.2...
130 0.000000 0.0 POLYGON ((-76.56497 -11.31924, -76.56074 -11.3...
131 0.000000 0.0 POLYGON ((-76.55171 -12.28024, -76.55021 -12.2...
PROVINCIA Año_2021_qt Año_2020_qt
127 LIMA -0.772468 -0.732259
128 BARRANCA -0.321971 0.143629
129 CAJATAMBO -5.199338 2.843409
130 CANTA -0.321971 -0.834427
131 CAÑETE -0.709378 0.267958
from libpysal.weights import Queen, Rook, KNN
# Calcular la vecindad espacial con el método Queen para los datos de Lima
w_queen_lima = Queen.from_dataframe(covid_provYear_Alarm_map_lima, use_index=False)
# Calcular la vecindad espacial con el método Rook para los datos de Lima
w_rook_lima = Rook.from_dataframe(covid_provYear_Alarm_map_lima, use_index=False)
# Calcular la vecindad espacial usando K-Nearest Neighbors (por ejemplo, k=8) para los datos de Lima
w_knn_lima = KNN.from_dataframe(covid_provYear_Alarm_map_lima, k=8)
# Verificar los resultados de los pesos de vecindad
print(w_queen_lima)
print(w_rook_lima)
print(w_knn_lima)
<libpysal.weights.contiguity.Queen object at 0x0000017757415730> <libpysal.weights.contiguity.Rook object at 0x000001774FD463C0> <libpysal.weights.distance.KNN object at 0x0000017735C99E50>
# Seleccionar la provincia de Lima específica para el análisis
base = covid_provYear_Alarm_map_lima[covid_provYear_Alarm_map_lima.PROVINCIA == "LIMA"].plot()
# Graficar los vecinos usando el índice de vecindad de 'Rook' para la primera provincia en el conjunto de Lima
covid_provYear_Alarm_map_lima.iloc[w_rook_lima.neighbors[0],].plot(ax=base, facecolor="yellow", edgecolor='k')
# Graficar la provincia seleccionada en rojo
covid_provYear_Alarm_map_lima.head(1).plot(ax=base, facecolor="red")
<Axes: >
w_queen.neighbors
{0: [2, 114, 4, 5, 180, 182, 55],
1: [3, 60, 61, 6],
2: [0, 143, 3, 4, 6, 182, 175],
3: [1, 2, 6, 143],
4: [0, 2, 6, 55, 56],
5: [0, 178, 180, 182, 175],
6: [1, 2, 3, 4, 56, 58, 60],
7: [16, 17, 23, 8, 26, 12, 14],
8: [17, 7, 23],
9: [16, 90, 13],
10: [16, 26, 12, 13],
11: [96, 129, 128, 16, 17, 20, 23, 89, 91],
12: [16, 26, 10, 7],
13: [16, 19, 26, 90, 9, 10],
14: [24, 17, 26, 7],
15: [24, 25, 18, 21],
16: [7, 9, 10, 11, 12, 13, 23, 90, 91],
17: [128, 23, 7, 8, 11, 14],
18: [22, 24, 25, 26, 15],
19: [13, 26, 22, 90, 93],
20: [128, 129, 11, 134],
21: [119, 24, 121, 123, 25, 15],
22: [18, 19, 119, 25, 26, 93],
23: [16, 17, 7, 8, 11],
24: [18, 21, 121, 26, 123, 14, 15],
25: [18, 21, 22, 119, 15],
26: [7, 10, 12, 13, 14, 18, 19, 22, 24],
27: [33, 69, 75, 28, 29, 30, 31],
28: [32, 75, 46, 47, 48, 50, 52, 27, 30],
29: [48, 33, 73, 27, 31, 30, 41],
30: [48, 27, 28, 29],
31: [33, 69, 73, 27, 76, 29],
32: [42, 52, 28, 46],
33: [27, 29, 31],
34: [35, 149, 38, 40, 172],
35: [34, 36, 37, 38, 39, 40],
36: [48, 49, 35, 100, 39, 41, 47],
37: [35, 38, 39],
38: [34, 35, 37, 39, 168, 73, 74, 172],
39: [35, 36, 37, 38, 73, 74, 41],
40: [34, 35, 148, 149, 150],
41: [48, 49, 36, 39, 73, 29],
42: [32, 82, 52, 85, 43, 45, 46],
43: [42, 51, 52, 85],
44: [51, 85, 47],
45: [42, 75, 108, 46, 81, 82, 84, 86],
46: [32, 42, 75, 28, 45],
47: [36, 100, 101, 44, 48, 50, 51, 85, 28],
48: [49, 36, 41, 28, 29, 30, 47],
49: [48, 41, 36],
50: [51, 52, 28, 47],
51: [50, 52, 85, 43, 44, 47],
52: [32, 50, 51, 42, 43, 28],
53: [64, 54, 55, 57, 122, 59, 62],
54: [114, 53, 117, 120, 122, 62],
55: [0, 114, 4, 53, 56, 59, 62],
56: [65, 4, 6, 55, 58, 59, 124, 125],
57: [64, 113, 115, 53, 118, 122, 63],
58: [56, 60, 125, 6],
59: [64, 65, 53, 55, 56, 63],
60: [1, 125, 6, 58, 156, 61, 126],
61: [1, 156, 60],
62: [114, 53, 54, 55],
63: [64, 65, 115, 57, 59, 124],
64: [57, 59, 53, 63],
65: [56, 59, 124, 63],
66: [127],
67: [69, 70, 76, 78, 79],
68: [71, 72, 73, 76, 78],
69: [67, 75, 27, 76, 79, 31],
70: [146, 67, 75, 77, 78, 79],
71: [169, 68, 72, 73, 74],
72: [68, 164, 71, 169, 78],
73: [68, 38, 39, 71, 41, 74, 76, 29, 31],
74: [169, 38, 39, 168, 73, 71],
75: [193, 69, 70, 108, 45, 46, 79, 146, 27, 28],
76: [67, 68, 69, 73, 78, 31],
77: [78, 146, 70],
78: [67, 68, 164, 70, 72, 76, 77, 146],
79: [75, 67, 69, 70],
80: [103, 136, 81, 82, 83, 84, 85, 86],
81: [80, 82, 84, 45],
82: [80, 81, 85, 42, 45],
83: [80, 99, 85, 102, 136],
84: [80, 81, 45, 86],
85: [98, 101, 102, 42, 43, 44, 47, 80, 82, 51, 83],
86: [80, 84, 103, 108, 45],
87: [96, 97, 88, 89, 92, 94],
88: [96, 87, 151, 152, 94],
89: [96, 97, 87, 11, 91, 92],
90: [16, 19, 93, 9, 91, 92, 13],
91: [16, 89, 90, 11, 92],
92: [194, 142, 176, 87, 184, 89, 90, 91, 93, 94, 95],
93: [19, 22, 119, 184, 90, 92],
94: [87, 151, 153, 88, 92, 95],
95: [192, 194, 153, 92, 94],
96: [129, 97, 135, 11, 87, 88, 89, 152],
97: [96, 89, 87],
98: [101, 100, 85, 102],
99: [136, 83, 131, 102],
100: [98, 36, 101, 47],
101: [98, 100, 85, 47],
102: [83, 98, 99, 85],
103: [80, 86, 136, 108, 104, 111],
104: [103, 136, 106, 108, 111],
105: [153, 106, 107, 108, 109],
106: [108, 133, 136, 105, 104, 109, 110],
107: [153, 151, 105, 109, 110],
108: [193, 103, 104, 105, 106, 75, 45, 86, 153],
109: [105, 106, 107, 110],
110: [130, 132, 133, 151, 106, 107, 109],
111: [136, 104, 103],
112: [113, 123, 116, 117],
113: [112, 117, 118, 57, 122],
114: [0, 180, 55, 54, 119, 120, 62],
115: [57, 124, 118, 63],
116: [112, 121, 123, 117],
117: [112, 113, 116, 54, 120, 121, 122],
118: [113, 115, 57],
119: [114, 180, 21, 22, 184, 121, 120, 93, 25],
120: [114, 117, 54, 119, 121],
121: [116, 21, 117, 119, 24, 123, 120],
122: [113, 117, 53, 54, 57],
123: [112, 116, 21, 24, 121],
124: [65, 115, 56, 125, 126, 63],
125: [60, 56, 58, 124, 126],
126: [124, 161, 156, 157, 154, 60, 125],
127: [66, 130, 132, 133, 131],
128: [17, 11, 20, 134],
129: [96, 20, 134, 135, 11],
130: [132, 133, 110, 127],
131: [136, 99, 133, 127],
132: [130, 134, 151, 110, 127],
133: [130, 131, 136, 106, 110, 127],
134: [128, 129, 132, 20, 135, 151],
135: [96, 129, 134, 151, 152],
136: [99, 131, 133, 103, 104, 106, 111, 80, 83],
137: [144, 139, 140, 141],
138: [143, 179, 183, 139, 141, 175],
139: [137, 138, 141, 143],
140: [144, 137, 141],
141: [192, 183, 137, 138, 139, 140, 142],
142: [192, 176, 194, 181, 183, 92, 141],
143: [2, 3, 138, 139, 175],
144: [137, 140],
145: [193, 146, 147, 164, 173],
146: [193, 145, 164, 70, 75, 77, 78],
147: [145, 195, 193],
148: [162, 149, 166, 150, 40, 186, 187],
149: [34, 162, 148, 40, 172],
150: [40, 187, 148],
151: [132, 134, 135, 107, 110, 88, 153, 152, 94],
152: [96, 151, 88, 135],
153: [192, 193, 105, 107, 108, 151, 94, 95],
154: [161, 126, 155, 157, 158, 159],
155: [154, 156, 157, 159],
156: [157, 155, 60, 61, 126],
157: [154, 155, 156, 126],
158: [160, 161, 154, 159],
159: [160, 158, 154, 155, 189, 190],
160: [158, 190, 159],
161: [154, 126, 158],
162: [148, 149, 166, 167, 172],
163: [164, 167, 168, 169, 171, 172],
164: [163, 72, 169, 171, 173, 78, 145, 146],
165: [174, 166],
166: [162, 148, 165, 186, 188],
167: [162, 163, 170, 171, 172],
168: [163, 38, 169, 74, 172],
169: [163, 164, 71, 72, 74, 168],
170: [167],
171: [163, 164, 173, 167],
172: [34, 162, 163, 149, 38, 167, 168],
173: [145, 171, 164],
174: [165],
175: [2, 5, 138, 143, 177, 178, 179, 182],
176: [177, 178, 180, 181, 184, 92, 142],
177: [176, 178, 179, 181, 175],
178: [176, 177, 180, 5, 175],
179: [177, 181, 183, 138, 175],
180: [0, 176, 178, 114, 5, 119, 184],
181: [176, 177, 179, 183, 142],
182: [0, 2, 5, 175],
183: [179, 181, 138, 141, 142],
184: [176, 180, 119, 92, 93],
185: [186, 187, 188],
186: [148, 166, 185, 187, 188],
187: [148, 150, 185, 186, 188],
188: [185, 186, 187, 166],
189: [159, 190, 191],
190: [160, 189, 159],
191: [189],
192: [193, 194, 153, 141, 142, 95],
193: [192, 195, 75, 108, 145, 146, 147, 153],
194: [192, 92, 142, 95],
195: [193, 147]}
pd.DataFrame(*w_queen.full()).astype(int) # 1 means both are neighbors
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 192 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
| 193 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 194 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 195 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
196 rows × 196 columns
w_queen.pct_nonzero
2.7332361516034984
w_queen.islands
[]
w_queen.transform = 'R'
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors
0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
...
191 1.0
192 1.0
193 1.0
194 1.0
195 1.0
Length: 196, dtype: float64
from esda.moran import Moran
moranCOVID = Moran(covid_provYear_Alarm_map['Año_2020_qt'], w_queen)
moranCOVID.I,moranCOVID.p_sim
(0.10863623693995704, 0.007)
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt
fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('Covid_criterio_share')
ax.set_ylabel('SpatialLag_Covid_criterio_share')
Text(0, 0.5, 'SpatialLag_Covid_criterio_share')
# The scatterplot with local info
from esda.moran import Moran_Local
# calculate Moran_Local and plot
lisaCOVID = Moran_Local(y=covid_provYear_Alarm_map['Año_2020_qt'], w=w_knn,seed=2022)
fig, ax = moran_scatterplot(lisaCOVID,p=0.05)
ax.set_xlabel('Covid_criterios_share')
ax.set_ylabel('SpatialLag_Covid_criterios_share');
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(lisaCOVID, covid_provYear_Alarm_map,'Año_2020_qt')
plt.show()
# the map with the spots and outliers
from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisaCOVID,
covid_provYear_Alarm_map,ax=ax,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)})
lisaCOVID.q
array([4, 1, 1, 1, 1, 2, 1, 3, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 2,
4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3,
3, 1, 1, 4, 2, 4, 4, 3, 3, 1, 1, 1, 4, 1, 2, 1, 1, 1, 1, 1, 1, 2,
4, 1, 2, 4, 1, 2, 4, 4, 4, 1, 3, 1, 4, 1, 1, 1, 1, 1, 1, 4, 2, 4,
3, 4, 3, 4, 4, 3, 4, 1, 3, 3, 1, 1, 4, 4, 1, 2, 4, 1, 2, 3, 1, 1,
1, 1, 1, 1, 2, 1, 4, 1, 1, 4, 1, 3, 1, 4, 1, 1, 1, 2, 4, 4, 3, 1,
1, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 3, 4, 3, 4, 4, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 4, 4, 4, 2, 4, 3, 1, 1, 4, 1, 4, 1,
4, 4, 4, 2, 4, 3, 1, 1, 4, 4, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2])
lisaCOVID.p_sim
array([0.194, 0.039, 0.403, 0.037, 0.451, 0.156, 0.173, 0.141, 0.107,
0.016, 0.105, 0.03 , 0.032, 0.008, 0.119, 0.027, 0.082, 0.026,
0.004, 0.045, 0.25 , 0.465, 0.002, 0.074, 0.118, 0.06 , 0.143,
0.459, 0.09 , 0.322, 0.181, 0.444, 0.303, 0.123, 0.1 , 0.118,
0.461, 0.437, 0.256, 0.09 , 0.012, 0.274, 0.245, 0.101, 0.332,
0.112, 0.495, 0.066, 0.466, 0.141, 0.017, 0.186, 0.273, 0.035,
0.084, 0.433, 0.424, 0.033, 0.352, 0.473, 0.298, 0.146, 0.324,
0.22 , 0.262, 0.079, 0.458, 0.5 , 0.357, 0.397, 0.214, 0.208,
0.419, 0.143, 0.352, 0.067, 0.206, 0.361, 0.354, 0.444, 0.282,
0.239, 0.107, 0.16 , 0.138, 0.34 , 0.262, 0.162, 0.27 , 0.003,
0.01 , 0.011, 0.165, 0.095, 0.12 , 0.162, 0.083, 0.085, 0.197,
0.113, 0.361, 0.338, 0.057, 0.382, 0.463, 0.265, 0.467, 0.326,
0.356, 0.405, 0.486, 0.437, 0.002, 0.034, 0.427, 0.274, 0.194,
0.002, 0.016, 0.011, 0.453, 0.196, 0.002, 0.355, 0.269, 0.421,
0.153, 0.481, 0.066, 0.017, 0.407, 0.302, 0.344, 0.486, 0.451,
0.078, 0.388, 0.022, 0.199, 0.03 , 0.074, 0.267, 0.15 , 0.009,
0.021, 0.41 , 0.427, 0.415, 0.004, 0.414, 0.004, 0.114, 0.013,
0.498, 0.216, 0.141, 0.273, 0.245, 0.156, 0.119, 0.177, 0.266,
0.154, 0.255, 0.38 , 0.048, 0.058, 0.156, 0.298, 0.427, 0.19 ,
0.226, 0.338, 0.351, 0.173, 0.457, 0.432, 0.371, 0.086, 0.479,
0.306, 0.322, 0.333, 0.241, 0.003, 0.008, 0.028, 0.024, 0.027,
0.146, 0.183, 0.178, 0.018, 0.338, 0.195, 0.355])
pd.Series(lisaCOVID.q).value_counts()
1 77 4 61 3 37 2 21 Name: count, dtype: int64
covid_provYear_Alarm_map['COVID_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID.q,lisaCOVID.p_sim) ]
covid_provYear_Alarm_map['COVID_quadrant'].value_counts()
COVID_quadrant 0 157 4 17 1 14 3 8 Name: count, dtype: int64
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']
covid_provYear_Alarm_map['COVID_quadrant_names']=[labels[i] for i in covid_provYear_Alarm_map['COVID_quadrant']]
covid_provYear_Alarm_map['COVID_quadrant_names'].value_counts()
COVID_quadrant_names 0 no_sig 157 4 hotOutlier 17 1 hotSpot 14 3 coldSpot 8 Name: count, dtype: int64
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])
f, ax = plt.subplots(1, figsize=(12,12))
plt.title('Spots and Outliers')
covid_provYear_Alarm_map.plot(column='COVID_quadrant_names',
categorical=True,
cmap=myColMap,
linewidth=0.1,
edgecolor='white',
legend=True,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)},
ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
covid_provYear_Alarm_map.explore("COVID_quadrant_names", categorical=True,tooltip='location',cmap=myColMap)